{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> 梁善宜 181013114"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 对猎聘PC版进行数据爬取\n",
    "* [猎聘PC版](https://www.liepin.com/zhaopin/) 以“产品经理”关键词为例。\n",
    "- 对公司、行业、城市三个不同参数的进行数据（文本、链接）的抓取。\n",
    "- 解析url参数，并建构参数模版。\n",
    "- 创建payload（翻页）模版，\n",
    "- xpath解析页面数据，\n",
    "- 翻页（可参考之前翻页参数模版））"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<Element 'div' class=('search-conditions',) data-selector='search-conditions'>]\n",
      "<Element 'div' class=('search-conditions',) data-selector='search-conditions'>\n",
      "[<Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>]\n",
      "公司：\n",
      "行业：\n",
      "城市：\n",
      "薪资：\n",
      "更多：\n",
      "<Element 'dd' class=('comp-list',)>\n",
      "<Element 'dd' class=('short-dd', 'select-industry') data-param='industries'>\n",
      "<Element 'dd' data-param='city'>\n",
      "<Element 'dd' data-param='salary'>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-time')>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-jobkind')>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-compscale')>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-compkind')>\n"
     ]
    }
   ],
   "source": [
    "# 对网页分类栏的文本类型抓取\n",
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "url = \"https://www.liepin.com/zhaopin/?keyword=产品经理\"\n",
    "session = HTMLSession()\n",
    "r = session.get( url )\n",
    "\n",
    "主要元素 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')\n",
    "\n",
    "print (主要元素)\n",
    "print (主要元素[0])\n",
    "print (主要元素[0].xpath('//dt[@class=\"search-title\"]'))\n",
    "\n",
    "list_search_title = 主要元素[0].xpath('//dt[@class=\"search-title\"]')\n",
    "for x in list_search_title:\n",
    "    print (x.text)\n",
    "    \n",
    "list_search_dd = 主要元素[0].xpath('//dt[@class=\"search-title\"]/following-sibling::dd')\n",
    "for x in list_search_dd:\n",
    "    print (x)  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 公司的爬取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'中国500强': '/zhaopin/?init=-1&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&compTag=155&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '2018互联网300强': '/zhaopin/?init=-1&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&compTag=182&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '制造业500强': '/zhaopin/?init=-1&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&compTag=186&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " 'AI创新成长50强 ': '/zhaopin/?init=-1&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&compTag=189&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '独角兽': '/zhaopin/?init=-1&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&compTag=130&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '上市公司': '/zhaopin/?init=-1&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&compTag=156&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d'}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对城市的数据（链接、文本）抓取\n",
    "公司数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[0] \\\n",
    "                    .xpath('//div[contains(@class,\"hot-comp-tags\")]/a')\n",
    "公司数据选择器链结\n",
    "\n",
    "公司数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 公司数据选择器链结}\n",
    "公司数据选择器链结"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### urllib模块功能介绍\n",
    "* urlparse \n",
    "返回的6个部分，分别是：scheme(机制)丶netloc(网络位置)丶path(路径)丶params(路径段参数)丶query(查询)丶fragment(片段)。\n",
    "* parse_qs\n",
    "返回query(查询)多个部分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    6 non-null      object\n",
      " 1   netloc    6 non-null      object\n",
      " 2   path      6 non-null      object\n",
      " 3   params    6 non-null      object\n",
      " 4   query     6 non-null      object\n",
      " 5   fragment  6 non-null      object\n",
      "dtypes: object(6)\n",
      "memory usage: 416.0+ bytes\n",
      "scheme      1\n",
      "netloc      1\n",
      "path        1\n",
      "params      1\n",
      "query       6\n",
      "fragment    1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=a2c857af6f47c165&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=a2c857af6f47c165&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=a2c857af6f47c165&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=a2c857af6f47c165&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=a2c857af6f47c165&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=a2c857af6f47c165&flushckid=1&...           \n",
       "1  init=-1&headckid=a2c857af6f47c165&flushckid=1&...           \n",
       "2  init=-1&headckid=a2c857af6f47c165&flushckid=1&...           \n",
       "3  init=-1&headckid=a2c857af6f47c165&flushckid=1&...           \n",
       "4  init=-1&headckid=a2c857af6f47c165&flushckid=1&...           "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对于城市爬取出来的url，使用urllib3 解析 url\n",
    "\n",
    "from urllib.parse import urlparse, parse_qs # 详细看上面urllib模块的介绍。\n",
    "df = pd.DataFrame([ urlparse(x) for x in 公司数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique()) # 用nunique这个函数可以查看数据有多少个不同值。\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init             1\n",
      "headckid         1\n",
      "flushckid        1\n",
      "fromSearchBtn    1\n",
      "keyword          1\n",
      "compTag          6\n",
      "ckid             1\n",
      "siTag            1\n",
      "d_sfrom          1\n",
      "d_ckId           1\n",
      "d_curPage        1\n",
      "d_pageSize       1\n",
      "d_headId         1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>init</th>\n",
       "      <th>headckid</th>\n",
       "      <th>flushckid</th>\n",
       "      <th>fromSearchBtn</th>\n",
       "      <th>keyword</th>\n",
       "      <th>compTag</th>\n",
       "      <th>ckid</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>155</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>182</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>186</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>189</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>130</td>\n",
       "      <td>ae5d83c90e7c8cd4</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b034127dbb003a0d287008095ccd9c63</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  init          headckid flushckid fromSearchBtn keyword compTag  \\\n",
       "0   -1  ae5d83c90e7c8cd4         1             2    产品经理     155   \n",
       "1   -1  ae5d83c90e7c8cd4         1             2    产品经理     182   \n",
       "2   -1  ae5d83c90e7c8cd4         1             2    产品经理     186   \n",
       "3   -1  ae5d83c90e7c8cd4         1             2    产品经理     189   \n",
       "4   -1  ae5d83c90e7c8cd4         1             2    产品经理     130   \n",
       "\n",
       "               ckid                                          siTag  \\\n",
       "0  ae5d83c90e7c8cd4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "1  ae5d83c90e7c8cd4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "2  ae5d83c90e7c8cd4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "3  ae5d83c90e7c8cd4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "4  ae5d83c90e7c8cd4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "\n",
       "          d_sfrom                            d_ckId d_curPage d_pageSize  \\\n",
       "0  search_unknown  b034127dbb003a0d287008095ccd9c63         0         40   \n",
       "1  search_unknown  b034127dbb003a0d287008095ccd9c63         0         40   \n",
       "2  search_unknown  b034127dbb003a0d287008095ccd9c63         0         40   \n",
       "3  search_unknown  b034127dbb003a0d287008095ccd9c63         0         40   \n",
       "4  search_unknown  b034127dbb003a0d287008095ccd9c63         0         40   \n",
       "\n",
       "                           d_headId  \n",
       "0  b034127dbb003a0d287008095ccd9c63  \n",
       "1  b034127dbb003a0d287008095ccd9c63  \n",
       "2  b034127dbb003a0d287008095ccd9c63  \n",
       "3  b034127dbb003a0d287008095ccd9c63  \n",
       "4  b034127dbb003a0d287008095ccd9c63  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 通过url的解析，看出了在url中只有query值不同，因此我们做下一步针对query 再解析\n",
    "\n",
    "#df_qs = pd.DataFrame([ parse_qs(x) for x in df['query'] ])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "#df_qs[['keyword','compTag']] 抓取表格中的某个值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 小结\n",
    "* comTag 是不同的公司选择器, 数值不同, 对映到不同类型的公司\n",
    "* keyword 是搜查关键字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['产品经理'], 'compTag': ['155'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}\n",
      "{'中国500强': '155', '2018互联网300强': '182', '制造业500强': '186', 'AI创新成长50强 ': '189', '独角兽': '130', '上市公司': '156'}\n"
     ]
    }
   ],
   "source": [
    "# 建构城市的参数模板 及 字典_compTag\n",
    "def parse_url_qs_for_compTag (url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "#1.输出第一条解析的url\n",
    "# parse_url_qs_for_compTag(list(公司数据选择器链结.values())[0])['compTag']\n",
    "参数模板 = parse_url_qs_for_compTag(list(公司数据选择器链结.values())[0])\n",
    "print(参数模板)\n",
    "\n",
    "# 2.选取表格中的comptag的值\n",
    "[ parse_url_qs_for_compTag(x)['compTag'] for x in 公司数据选择器链结.values()]\n",
    "#[ parse_url_qs_for_compTag(x)['compTag'][0] for x in 公司数据选择器链结.values()]\n",
    "\n",
    "# 3.加上文本说明后的字典\n",
    "字典_compTag = { k:parse_url_qs_for_compTag(v)['compTag'][0] for k,v in 公司数据选择器链结.items()}\n",
    "print (字典_compTag)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'中国500强': {'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['155'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}, '2018互联网300强': {'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['182'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}, '制造业500强': {'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['186'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}, 'AI创新成长50强 ': {'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['189'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}, '独角兽': {'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['130'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}, '上市公司': {'init': ['-1'], 'headckid': ['ae5d83c90e7c8cd4'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['156'], 'ckid': ['ae5d83c90e7c8cd4'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['b034127dbb003a0d287008095ccd9c63'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['b034127dbb003a0d287008095ccd9c63']}}\n"
     ]
    }
   ],
   "source": [
    "# 生成城市参数模板  \n",
    "def 参数模板生成(compTag , keyword ):\n",
    "    参数 = 参数模板.copy()  # 复制一份参数模版\n",
    "    参数['compTag'] = compTag\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "# 赋值：keyword\n",
    "参数_compTag_用户体验 = { k:参数模板生成(compTag = [v], keyword = ['用户体验']) for k,v in 字典_compTag.items()}\n",
    "print(参数_compTag_用户体验)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# request请求\n",
    "session = HTMLSession()\n",
    "\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '行业': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>行业</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>热门公司类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月23日</td>\n",
       "      <td>安装成本高级经理</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>禹洲地产股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927695079.shtml</td>\n",
       "      <td>https://www.liepin.com/company/2245242/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>25-35k·16薪</td>\n",
       "      <td>2020年04月23日</td>\n",
       "      <td>集团总部-云架构管理岗</td>\n",
       "      <td>广州</td>\n",
       "      <td>越秀集团</td>\n",
       "      <td>https://www.liepin.com/job/1927682669.shtml</td>\n",
       "      <td>https://www.liepin.com/company/4585715/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>30-40k·12薪</td>\n",
       "      <td>2020年04月23日</td>\n",
       "      <td>商业集团-投资关系专业总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>弘阳集团</td>\n",
       "      <td>https://www.liepin.com/job/1927599825.shtml</td>\n",
       "      <td>https://www.liepin.com/company/729252/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月23日</td>\n",
       "      <td>工程管理总监/经理</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>禹洲地产股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927104937.shtml</td>\n",
       "      <td>https://www.liepin.com/company/2245242/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>6-7k·13薪</td>\n",
       "      <td>2020年04月22日</td>\n",
       "      <td>人力资源专员（中关村）</td>\n",
       "      <td></td>\n",
       "      <td>中国国际技术智力合作有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927674527.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1233751/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>16-41k·12薪</td>\n",
       "      <td>2020年04月14日</td>\n",
       "      <td>私人财富管家/私人银行理财经理</td>\n",
       "      <td>太原-三桥</td>\n",
       "      <td>山西证券</td>\n",
       "      <td>https://www.liepin.com/job/1923874779.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8538316/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>30-50k·12薪</td>\n",
       "      <td>2020年04月13日</td>\n",
       "      <td>集团总部投资总监</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>禹洲地产股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927409715.shtml</td>\n",
       "      <td>https://www.liepin.com/company/2245242/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>15-25k·13薪</td>\n",
       "      <td>2020年04月13日</td>\n",
       "      <td>高级运维开发工程师</td>\n",
       "      <td>杭州</td>\n",
       "      <td>网易集团</td>\n",
       "      <td>https://www.liepin.com/job/1926674043.shtml</td>\n",
       "      <td>https://www.liepin.com/company/5964833/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月10日</td>\n",
       "      <td>集团总部招采总监</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>禹洲地产股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927352801.shtml</td>\n",
       "      <td>https://www.liepin.com/company/2245242/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>4-6k·12薪</td>\n",
       "      <td>2020年04月10日</td>\n",
       "      <td>猎头顾问（面试快）</td>\n",
       "      <td></td>\n",
       "      <td>猎聘</td>\n",
       "      <td>https://www.liepin.com/job/1925796945.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7873694/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>240 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      edu     经验          薪水           时间               职称      行业  \\\n",
       "0    统招本科  5-10年          面议  2020年04月23日         安装成本高级经理  深圳-南山区   \n",
       "1   本科及以上  5-10年  25-35k·16薪  2020年04月23日      集团总部-云架构管理岗      广州   \n",
       "2   本科及以上  5-10年  30-40k·12薪  2020年04月23日    商业集团-投资关系专业总监      上海   \n",
       "3    统招本科  5-10年          面议  2020年04月23日        工程管理总监/经理  深圳-南山区   \n",
       "4   大专及以上   经验不限    6-7k·13薪  2020年04月22日      人力资源专员（中关村）           \n",
       "..    ...    ...         ...          ...              ...     ...   \n",
       "35   统招本科   3-5年  16-41k·12薪  2020年04月14日  私人财富管家/私人银行理财经理   太原-三桥   \n",
       "36   统招本科  5-10年  30-50k·12薪  2020年04月13日         集团总部投资总监  深圳-南山区   \n",
       "37  本科及以上   3-5年  15-25k·13薪  2020年04月13日        高级运维开发工程师      杭州   \n",
       "38   统招本科  5-10年          面议  2020年04月10日         集团总部招采总监  深圳-南山区   \n",
       "39  本科及以上   1-3年    4-6k·12薪  2020年04月10日        猎头顾问（面试快）           \n",
       "\n",
       "              公司名称                                           链结  \\\n",
       "0       禹洲地产股份有限公司  https://www.liepin.com/job/1927695079.shtml   \n",
       "1             越秀集团  https://www.liepin.com/job/1927682669.shtml   \n",
       "2             弘阳集团  https://www.liepin.com/job/1927599825.shtml   \n",
       "3       禹洲地产股份有限公司  https://www.liepin.com/job/1927104937.shtml   \n",
       "4   中国国际技术智力合作有限公司  https://www.liepin.com/job/1927674527.shtml   \n",
       "..             ...                                          ...   \n",
       "35            山西证券  https://www.liepin.com/job/1923874779.shtml   \n",
       "36      禹洲地产股份有限公司  https://www.liepin.com/job/1927409715.shtml   \n",
       "37            网易集团  https://www.liepin.com/job/1926674043.shtml   \n",
       "38      禹洲地产股份有限公司  https://www.liepin.com/job/1927352801.shtml   \n",
       "39              猎聘  https://www.liepin.com/job/1925796945.shtml   \n",
       "\n",
       "                                      公司URL  热门公司类型  \n",
       "0   https://www.liepin.com/company/2245242/  中国500强  \n",
       "1   https://www.liepin.com/company/4585715/  中国500强  \n",
       "2    https://www.liepin.com/company/729252/  中国500强  \n",
       "3   https://www.liepin.com/company/2245242/  中国500强  \n",
       "4   https://www.liepin.com/company/1233751/  中国500强  \n",
       "..                                      ...     ...  \n",
       "35  https://www.liepin.com/company/8538316/    上市公司  \n",
       "36  https://www.liepin.com/company/2245242/    上市公司  \n",
       "37  https://www.liepin.com/company/5964833/    上市公司  \n",
       "38  https://www.liepin.com/company/2245242/    上市公司  \n",
       "39  https://www.liepin.com/company/7873694/    上市公司  \n",
       "\n",
       "[240 rows x 10 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_compTag_用户体验.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# C-4   输出\n",
    "df_all.to_excel(\"公司类型的数据爬取.xlsx\", sheet_name=\"搜查结果\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 行业的爬取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'互联网/电商': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=040&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '游戏产业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=420&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '计算机软件': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=010&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " 'IT服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=030&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '电子/芯片/半导体': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=050&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '通信业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=060&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '计算机/网络设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=020&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '房地产/建筑': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=080&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '规划/设计/装潢': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=100&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '房地产服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=090&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '银行': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=130&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '保险': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=140&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '基金/证券/投资': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=150&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '会计/审计': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=430&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '信托/担保/拍卖': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=500&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '快消品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=190&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '批发零售': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=240&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '服装纺织': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=200&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '家具/家电': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=210&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '办公设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=220&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '奢侈品/收藏品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=460&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '珠宝/玩具/工艺品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=470&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '汽车/摩托车': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=350&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '机械/机电/重工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=360&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '印刷/包装/造纸': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=180&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '原材料加工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=370&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '仪器/电气/自动化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=340&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '制药/生物工程': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=270&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '医疗/保健/美容': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=280&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '医疗器械': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=290&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '能源/水利': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=330&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '石油/化工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=310&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '采掘/冶炼/矿产': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=320&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '环保': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=300&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '新能源': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=490&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '专业服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=120&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '中介服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=110&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '外包服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=440&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '检测/认证': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=450&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '餐饮/酒旅/服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=230&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '文体娱乐': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=260&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '租赁服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=510&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '广告/市场/会展': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=070&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '影视文化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=170&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '教育培训': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=380&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '交通/物流/运输': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=250&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '贸易/进出口': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=160&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '航空/航天': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=480&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '政务/公共服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=390&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '农林牧渔': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=410&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d',\n",
       " '其他行业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=400&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d'}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对行业的数据（链接、文本）抓取\n",
    "行业数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[1] \\\n",
    "                    .xpath('//div[contains(@class,\"sub-industry\")]/a')\n",
    "行业数据选择器链结\n",
    "\n",
    "行业数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 行业数据选择器链结}\n",
    "行业数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 51 entries, 0 to 50\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    51 non-null     object\n",
      " 1   netloc    51 non-null     object\n",
      " 2   path      51 non-null     object\n",
      " 3   params    51 non-null     object\n",
      " 4   query     51 non-null     object\n",
      " 5   fragment  51 non-null     object\n",
      "dtypes: object(6)\n",
      "memory usage: 2.5+ KB\n",
      "scheme       1\n",
      "netloc       1\n",
      "path         1\n",
      "params       1\n",
      "query       51\n",
      "fragment     1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>subIndustry=&amp;init=-1&amp;industryType=industry_01&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  subIndustry=&init=-1&industryType=industry_01&...           "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from urllib.parse import urlparse, parse_qs\n",
    "\n",
    "df = pd.DataFrame([ urlparse(x) for x in 行业数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init              1\n",
      "industryType     12\n",
      "headckid          1\n",
      "flushckid         1\n",
      "fromSearchBtn     1\n",
      "industries       51\n",
      "keyword           1\n",
      "ckid              1\n",
      "siTag             1\n",
      "d_sfrom           1\n",
      "d_ckId            1\n",
      "d_curPage         1\n",
      "d_pageSize        1\n",
      "d_headId          1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>init</th>\n",
       "      <th>industryType</th>\n",
       "      <th>headckid</th>\n",
       "      <th>flushckid</th>\n",
       "      <th>fromSearchBtn</th>\n",
       "      <th>industries</th>\n",
       "      <th>keyword</th>\n",
       "      <th>ckid</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>040</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>420</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>010</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>030</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>050</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>a2c857af6f47c165</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>16345e20e424a274af0a22d92523195d</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  init industryType          headckid flushckid fromSearchBtn industries  \\\n",
       "0   -1  industry_01  a2c857af6f47c165         1             2        040   \n",
       "1   -1  industry_01  a2c857af6f47c165         1             2        420   \n",
       "2   -1  industry_01  a2c857af6f47c165         1             2        010   \n",
       "3   -1  industry_01  a2c857af6f47c165         1             2        030   \n",
       "4   -1  industry_02  a2c857af6f47c165         1             2        050   \n",
       "\n",
       "  keyword              ckid                                          siTag  \\\n",
       "0    产品经理  a2c857af6f47c165  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "1    产品经理  a2c857af6f47c165  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "2    产品经理  a2c857af6f47c165  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "3    产品经理  a2c857af6f47c165  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "4    产品经理  a2c857af6f47c165  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw   \n",
       "\n",
       "          d_sfrom                            d_ckId d_curPage d_pageSize  \\\n",
       "0  search_unknown  16345e20e424a274af0a22d92523195d         0         40   \n",
       "1  search_unknown  16345e20e424a274af0a22d92523195d         0         40   \n",
       "2  search_unknown  16345e20e424a274af0a22d92523195d         0         40   \n",
       "3  search_unknown  16345e20e424a274af0a22d92523195d         0         40   \n",
       "4  search_unknown  16345e20e424a274af0a22d92523195d         0         40   \n",
       "\n",
       "                           d_headId  \n",
       "0  16345e20e424a274af0a22d92523195d  \n",
       "1  16345e20e424a274af0a22d92523195d  \n",
       "2  16345e20e424a274af0a22d92523195d  \n",
       "3  16345e20e424a274af0a22d92523195d  \n",
       "4  16345e20e424a274af0a22d92523195d  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 针对query 再解析之 \n",
    "#df_qs = pd.DataFrame([ parse_qs(x) for x in df['query'] ])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "# df_qs[['keyword','industries']]  取表格中特定的某个值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['040'], 'keyword': ['产品经理'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}\n",
      "{'互联网/电商': '040', '游戏产业': '420', '计算机软件': '010', 'IT服务': '030', '电子/芯片/半导体': '050', '通信业': '060', '计算机/网络设备': '020', '房地产/建筑': '080', '规划/设计/装潢': '100', '房地产服务': '090', '银行': '130', '保险': '140', '基金/证券/投资': '150', '会计/审计': '430', '信托/担保/拍卖': '500', '快消品': '190', '批发零售': '240', '服装纺织': '200', '家具/家电': '210', '办公设备': '220', '奢侈品/收藏品': '460', '珠宝/玩具/工艺品': '470', '汽车/摩托车': '350', '机械/机电/重工': '360', '印刷/包装/造纸': '180', '原材料加工': '370', '仪器/电气/自动化': '340', '制药/生物工程': '270', '医疗/保健/美容': '280', '医疗器械': '290', '能源/水利': '330', '石油/化工': '310', '采掘/冶炼/矿产': '320', '环保': '300', '新能源': '490', '专业服务': '120', '中介服务': '110', '外包服务': '440', '检测/认证': '450', '餐饮/酒旅/服务': '230', '文体娱乐': '260', '租赁服务': '510', '广告/市场/会展': '070', '影视文化': '170', '教育培训': '380', '交通/物流/运输': '250', '贸易/进出口': '160', '航空/航天': '480', '政务/公共服务': '390', '农林牧渔': '410', '其他行业': '400'}\n"
     ]
    }
   ],
   "source": [
    "# 建构 参数模板 及 字典_industries\n",
    "def parse_url_qs_for_industries (url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "# parse_url_qs_for_compTag(list(公司数据选择器链结.values())[0])['compTag']\n",
    "参数模板 = parse_url_qs_for_industries(list(行业数据选择器链结.values())[0])\n",
    "print(参数模板)\n",
    "# [ parse_url_qs_for_compTag(x)['compTag'] for x in 公司数据选择器链结.values()]\n",
    "[ parse_url_qs_for_industries(x)['industries'][0] for x in 行业数据选择器链结.values()]\n",
    "\n",
    "字典_industries = { k:parse_url_qs_for_industries(v)['industries'][0] for k,v in 行业数据选择器链结.items()}\n",
    "print (字典_industries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'互联网/电商': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['040'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '游戏产业': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['420'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '计算机软件': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['010'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, 'IT服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['030'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '电子/芯片/半导体': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['050'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '通信业': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['060'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '计算机/网络设备': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['020'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '房地产/建筑': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['080'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '规划/设计/装潢': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['100'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '房地产服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['090'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '银行': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['130'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '保险': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['140'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '基金/证券/投资': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['150'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '会计/审计': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['430'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '信托/担保/拍卖': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['500'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '快消品': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['190'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '批发零售': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['240'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '服装纺织': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['200'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '家具/家电': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['210'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '办公设备': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['220'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '奢侈品/收藏品': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['460'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '珠宝/玩具/工艺品': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['470'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '汽车/摩托车': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['350'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '机械/机电/重工': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['360'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '印刷/包装/造纸': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['180'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '原材料加工': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['370'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '仪器/电气/自动化': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['340'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '制药/生物工程': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['270'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '医疗/保健/美容': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['280'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '医疗器械': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['290'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '能源/水利': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['330'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '石油/化工': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['310'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '采掘/冶炼/矿产': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['320'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '环保': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['300'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '新能源': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['490'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '专业服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['120'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '中介服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['110'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '外包服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['440'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '检测/认证': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['450'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '餐饮/酒旅/服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['230'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '文体娱乐': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['260'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '租赁服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['510'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '广告/市场/会展': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['070'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '影视文化': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['170'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '教育培训': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['380'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '交通/物流/运输': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['250'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '贸易/进出口': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['160'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '航空/航天': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['480'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '政务/公共服务': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['390'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '农林牧渔': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['410'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}, '其他行业': {'init': ['-1'], 'industryType': ['industry_01'], 'headckid': ['a2c857af6f47c165'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'industries': ['400'], 'keyword': ['用户体验'], 'ckid': ['a2c857af6f47c165'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['16345e20e424a274af0a22d92523195d'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['16345e20e424a274af0a22d92523195d']}}"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "def 参数模板生成(industries , keyword ):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['industries'] = industries\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "参数_industries_用户体验 = { k:参数模板生成(industries = [v], keyword = ['用户体验']) for k,v in 字典_industries.items()}\n",
    "print(参数_industries_用户体验)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://www.liepin.com/zhaopin/?init=-1&industryType=industry_01&headckid=a2c857af6f47c165&flushckid=1&fromSearchBtn=2&industries=040&keyword=%E7%94%A8%E6%88%B7%E4%BD%93%E9%AA%8C&ckid=a2c857af6f47c165&siTag=1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=16345e20e424a274af0a22d92523195d&d_curPage=0&d_pageSize=40&d_headId=16345e20e424a274af0a22d92523195d'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 尝试取其中一个url 查看参数模版是否建构成功\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_industries_用户体验['互联网/电商']\n",
    "r = session.get( url, params = payload)\n",
    "r.url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = HTMLSession()\n",
    "\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '行业': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>行业</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>热门行业分类</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>策划主管/经理</td>\n",
       "      <td>广州</td>\n",
       "      <td>广州谷雨生物科技有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927746037.shtml</td>\n",
       "      <td>https://www.liepin.com/company/12197905/</td>\n",
       "      <td>互联网/电商</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-20k·13薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>客服主管</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>泽金金服</td>\n",
       "      <td>https://www.liepin.com/job/1927746007.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8787574/</td>\n",
       "      <td>互联网/电商</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>10-15k·14薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>运维工程师(J10609)</td>\n",
       "      <td>广州-番禺区</td>\n",
       "      <td>有米科技</td>\n",
       "      <td>https://www.liepin.com/job/1927745611.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7863254/</td>\n",
       "      <td>互联网/电商</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>董事长助理</td>\n",
       "      <td>广州</td>\n",
       "      <td>泉后集团</td>\n",
       "      <td>https://www.liepin.com/job/1927743139.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10269965/</td>\n",
       "      <td>互联网/电商</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-40k·14薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>美妆运营总监</td>\n",
       "      <td>杭州</td>\n",
       "      <td>蘑菇街</td>\n",
       "      <td>https://www.liepin.com/job/1927738411.shtml</td>\n",
       "      <td>https://www.liepin.com/company/3824406/</td>\n",
       "      <td>互联网/电商</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>硕士及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-20k·13薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>上位机软件工程师</td>\n",
       "      <td>重庆-渝北区</td>\n",
       "      <td>北京理工大学重庆创新中心</td>\n",
       "      <td>https://www.liepin.com/job/1927746867.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10137173/</td>\n",
       "      <td>其他行业</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>6-12k·13薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>系统测试工程师</td>\n",
       "      <td>重庆-渝北区</td>\n",
       "      <td>北京理工大学重庆创新中心</td>\n",
       "      <td>https://www.liepin.com/job/1927746353.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10137173/</td>\n",
       "      <td>其他行业</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>莞建公司技术岗主管</td>\n",
       "      <td>东莞</td>\n",
       "      <td>东莞实业集团</td>\n",
       "      <td>https://www.liepin.com/job/1927746261.shtml</td>\n",
       "      <td>https://www.liepin.com/company/5254322/</td>\n",
       "      <td>其他行业</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>16-25k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>莞建公司项目管理部副总监</td>\n",
       "      <td>东莞</td>\n",
       "      <td>东莞实业集团</td>\n",
       "      <td>https://www.liepin.com/job/1927745899.shtml</td>\n",
       "      <td>https://www.liepin.com/company/5254322/</td>\n",
       "      <td>其他行业</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>16-25k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>莞建公司项目管理部总监</td>\n",
       "      <td>东莞</td>\n",
       "      <td>东莞实业集团</td>\n",
       "      <td>https://www.liepin.com/job/1927745777.shtml</td>\n",
       "      <td>https://www.liepin.com/company/5254322/</td>\n",
       "      <td>其他行业</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2040 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      edu     经验          薪水           时间             职称      行业  \\\n",
       "0    统招本科   3-5年  10-15k·12薪  2020年04月24日        策划主管/经理      广州   \n",
       "1   本科及以上   3-5年  10-20k·13薪  2020年04月24日           客服主管  深圳-南山区   \n",
       "2    统招本科   1-3年  10-15k·14薪  2020年04月24日  运维工程师(J10609)  广州-番禺区   \n",
       "3    统招本科   3-5年  15-25k·12薪  2020年04月24日          董事长助理      广州   \n",
       "4   大专及以上  5-10年  20-40k·14薪  2020年04月24日         美妆运营总监      杭州   \n",
       "..    ...    ...         ...          ...            ...     ...   \n",
       "35  硕士及以上   3-5年  10-20k·13薪  2020年04月24日       上位机软件工程师  重庆-渝北区   \n",
       "36  本科及以上   1-3年   6-12k·13薪  2020年04月24日        系统测试工程师  重庆-渝北区   \n",
       "37  大专及以上   3-5年  10-15k·12薪  2020年04月24日      莞建公司技术岗主管      东莞   \n",
       "38  本科及以上  5-10年  16-25k·12薪  2020年04月24日   莞建公司项目管理部副总监      东莞   \n",
       "39  本科及以上  5-10年  16-25k·12薪  2020年04月24日    莞建公司项目管理部总监      东莞   \n",
       "\n",
       "            公司名称                                           链结  \\\n",
       "0   广州谷雨生物科技有限公司  https://www.liepin.com/job/1927746037.shtml   \n",
       "1           泽金金服  https://www.liepin.com/job/1927746007.shtml   \n",
       "2           有米科技  https://www.liepin.com/job/1927745611.shtml   \n",
       "3           泉后集团  https://www.liepin.com/job/1927743139.shtml   \n",
       "4            蘑菇街  https://www.liepin.com/job/1927738411.shtml   \n",
       "..           ...                                          ...   \n",
       "35  北京理工大学重庆创新中心  https://www.liepin.com/job/1927746867.shtml   \n",
       "36  北京理工大学重庆创新中心  https://www.liepin.com/job/1927746353.shtml   \n",
       "37        东莞实业集团  https://www.liepin.com/job/1927746261.shtml   \n",
       "38        东莞实业集团  https://www.liepin.com/job/1927745899.shtml   \n",
       "39        东莞实业集团  https://www.liepin.com/job/1927745777.shtml   \n",
       "\n",
       "                                       公司URL  热门行业分类  \n",
       "0   https://www.liepin.com/company/12197905/  互联网/电商  \n",
       "1    https://www.liepin.com/company/8787574/  互联网/电商  \n",
       "2    https://www.liepin.com/company/7863254/  互联网/电商  \n",
       "3   https://www.liepin.com/company/10269965/  互联网/电商  \n",
       "4    https://www.liepin.com/company/3824406/  互联网/电商  \n",
       "..                                       ...     ...  \n",
       "35  https://www.liepin.com/company/10137173/    其他行业  \n",
       "36  https://www.liepin.com/company/10137173/    其他行业  \n",
       "37   https://www.liepin.com/company/5254322/    其他行业  \n",
       "38   https://www.liepin.com/company/5254322/    其他行业  \n",
       "39   https://www.liepin.com/company/5254322/    其他行业  \n",
       "\n",
       "[2040 rows x 10 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_industries_用户体验.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门行业分类 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# C-4   输出\n",
    "df_all.to_excel(\"行业类型的数据爬取.xlsx\", sheet_name=\"搜查结果\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 城市的爬取 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'全国': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '北京': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=010&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '上海': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=020&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '广州': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=050020&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '深圳': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=050090&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '天津': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=030&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '苏州': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=060080&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '重庆': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=040&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '南京': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=060020&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '杭州': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=070020&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '大连': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=210040&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '成都': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=280020&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '武汉': '/zhaopin/?init=-1&headckid=339f66b497b256e9&flushckid=1&fromSearchBtn=2&dqs=170020&keyword=%E7%94%A8%E6%88%B7%E7%A0%94%E7%A9%B6&ckid=339f66b497b256e9&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=afa446921de14f892c9c68b4998a7347&d_curPage=0&d_pageSize=40&d_headId=afa446921de14f892c9c68b4998a7347',\n",
       " '其他': 'javascript:;'}"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对城市分类的数据（链接、文本）抓取\n",
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "url = \"https://www.liepin.com/zhaopin/?keyword=用户研究\"\n",
    "session = HTMLSession()\n",
    "r = session.get( url )\n",
    "\n",
    "城市链接 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[2] \\\n",
    "                    .xpath('//a/@href')\n",
    "城市文本 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                 .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[2] \\\n",
    "                  .xpath('//a/text()')\n",
    "\n",
    "城市数据选择器链结 = dict(zip(城市文本,城市链接))\n",
    "城市数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 14 entries, 0 to 13\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    14 non-null     object\n",
      " 1   netloc    14 non-null     object\n",
      " 2   path      14 non-null     object\n",
      " 3   params    14 non-null     object\n",
      " 4   query     14 non-null     object\n",
      " 5   fragment  14 non-null     object\n",
      "dtypes: object(6)\n",
      "memory usage: 800.0+ bytes\n",
      "scheme       2\n",
      "netloc       1\n",
      "path         2\n",
      "params       1\n",
      "query       14\n",
      "fragment     1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=339f66b497b256e9&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=339f66b497b256e9&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=339f66b497b256e9&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=339f66b497b256e9&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=339f66b497b256e9&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=339f66b497b256e9&flushckid=1&...           \n",
       "1  init=-1&headckid=339f66b497b256e9&flushckid=1&...           \n",
       "2  init=-1&headckid=339f66b497b256e9&flushckid=1&...           \n",
       "3  init=-1&headckid=339f66b497b256e9&flushckid=1&...           \n",
       "4  init=-1&headckid=339f66b497b256e9&flushckid=1&...           "
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析url\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "\n",
    "df = pd.DataFrame([ urlparse(x) for x in 城市数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init              1\n",
      "headckid          1\n",
      "flushckid         1\n",
      "fromSearchBtn     1\n",
      "keyword           1\n",
      "ckid              1\n",
      "siTag             1\n",
      "d_sfrom           1\n",
      "d_ckId            1\n",
      "d_curPage         1\n",
      "d_pageSize        1\n",
      "d_headId          1\n",
      "dqs              12\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>init</th>\n",
       "      <th>headckid</th>\n",
       "      <th>flushckid</th>\n",
       "      <th>fromSearchBtn</th>\n",
       "      <th>keyword</th>\n",
       "      <th>ckid</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "      <th>dqs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>050020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>050090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>060080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>060020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>070020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>210040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>-1</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>用户研究</td>\n",
       "      <td>339f66b497b256e9</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>afa446921de14f892c9c68b4998a7347</td>\n",
       "      <td>280020</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   init          headckid flushckid fromSearchBtn keyword              ckid  \\\n",
       "0    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "1    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "2    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "3    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "4    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "5    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "6    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "7    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "8    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "9    -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "10   -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "11   -1  339f66b497b256e9         1             2    用户研究  339f66b497b256e9   \n",
       "\n",
       "                                            siTag         d_sfrom  \\\n",
       "0   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "1   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "2   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "3   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "4   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "5   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "6   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "7   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "8   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "9   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "10  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "11  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "\n",
       "                              d_ckId d_curPage d_pageSize  \\\n",
       "0   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "1   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "2   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "3   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "4   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "5   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "6   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "7   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "8   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "9   afa446921de14f892c9c68b4998a7347         0         40   \n",
       "10  afa446921de14f892c9c68b4998a7347         0         40   \n",
       "11  afa446921de14f892c9c68b4998a7347         0         40   \n",
       "\n",
       "                            d_headId     dqs  \n",
       "0   afa446921de14f892c9c68b4998a7347     NaN  \n",
       "1   afa446921de14f892c9c68b4998a7347     010  \n",
       "2   afa446921de14f892c9c68b4998a7347     020  \n",
       "3   afa446921de14f892c9c68b4998a7347  050020  \n",
       "4   afa446921de14f892c9c68b4998a7347  050090  \n",
       "5   afa446921de14f892c9c68b4998a7347     030  \n",
       "6   afa446921de14f892c9c68b4998a7347  060080  \n",
       "7   afa446921de14f892c9c68b4998a7347     040  \n",
       "8   afa446921de14f892c9c68b4998a7347  060020  \n",
       "9   afa446921de14f892c9c68b4998a7347  070020  \n",
       "10  afa446921de14f892c9c68b4998a7347  210040  \n",
       "11  afa446921de14f892c9c68b4998a7347  280020  "
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# B-3 针对query 再解析之 \n",
    "#df_qs = pd.DataFrame([ parse_qs(x) for x in df['query'] ])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head(12)\n",
    "# df_qs[['keyword','industries']]  取表格中特定的某个值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>keyword</th>\n",
       "      <th>dqs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>050020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>050090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>060080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>060020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>070020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>210040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>280020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>用户研究</td>\n",
       "      <td>170020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   keyword     dqs\n",
       "0     用户研究     NaN\n",
       "1     用户研究     010\n",
       "2     用户研究     020\n",
       "3     用户研究  050020\n",
       "4     用户研究  050090\n",
       "5     用户研究     030\n",
       "6     用户研究  060080\n",
       "7     用户研究     040\n",
       "8     用户研究  060020\n",
       "9     用户研究  070020\n",
       "10    用户研究  210040\n",
       "11    用户研究  280020\n",
       "12    用户研究  170020\n",
       "13     NaN     NaN"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_qs[['keyword','dqs']] #取表格中特定的某个值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 薪资的爬取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'10-15万': '/zhaopin/?init=-1&salary=10$15&ckid=16fbac46101aa036&headckid=16fbac46101aa036&flushckid=1&fromSearchBtn=2&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=3c35314940ccd26bffc65066bb46a906&d_curPage=0&d_pageSize=40&d_headId=3c35314940ccd26bffc65066bb46a906',\n",
       " '15-20万': '/zhaopin/?init=-1&salary=15$20&ckid=16fbac46101aa036&headckid=16fbac46101aa036&flushckid=1&fromSearchBtn=2&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=3c35314940ccd26bffc65066bb46a906&d_curPage=0&d_pageSize=40&d_headId=3c35314940ccd26bffc65066bb46a906',\n",
       " '20-30万': '/zhaopin/?init=-1&salary=20$30&ckid=16fbac46101aa036&headckid=16fbac46101aa036&flushckid=1&fromSearchBtn=2&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=3c35314940ccd26bffc65066bb46a906&d_curPage=0&d_pageSize=40&d_headId=3c35314940ccd26bffc65066bb46a906',\n",
       " '30-50万': '/zhaopin/?init=-1&salary=30$50&ckid=16fbac46101aa036&headckid=16fbac46101aa036&flushckid=1&fromSearchBtn=2&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=3c35314940ccd26bffc65066bb46a906&d_curPage=0&d_pageSize=40&d_headId=3c35314940ccd26bffc65066bb46a906',\n",
       " '50-100万': '/zhaopin/?init=-1&salary=50$100&ckid=16fbac46101aa036&headckid=16fbac46101aa036&flushckid=1&fromSearchBtn=2&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=3c35314940ccd26bffc65066bb46a906&d_curPage=0&d_pageSize=40&d_headId=3c35314940ccd26bffc65066bb46a906',\n",
       " '100万以上': '/zhaopin/?init=-1&salary=100$999&ckid=16fbac46101aa036&headckid=16fbac46101aa036&flushckid=1&fromSearchBtn=2&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=3c35314940ccd26bffc65066bb46a906&d_curPage=0&d_pageSize=40&d_headId=3c35314940ccd26bffc65066bb46a906'}"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "url = \"https://www.liepin.com/zhaopin?\"\n",
    "session = HTMLSession()\n",
    "r = session.get( url )\n",
    "\n",
    "薪资数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[3] \\\n",
    "                    .xpath('//dd[contains(@data-param,\"salary\")]/a')\n",
    "薪资数据选择器链结\n",
    "\n",
    "薪资数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 薪资数据选择器链结}\n",
    "薪资数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    6 non-null      object\n",
      " 1   netloc    6 non-null      object\n",
      " 2   path      6 non-null      object\n",
      " 3   params    6 non-null      object\n",
      " 4   query     6 non-null      object\n",
      " 5   fragment  6 non-null      object\n",
      "dtypes: object(6)\n",
      "memory usage: 416.0+ bytes\n",
      "scheme      1\n",
      "netloc      1\n",
      "path        1\n",
      "params      1\n",
      "query       6\n",
      "fragment    1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;salary=10$15&amp;ckid=16fbac46101aa036&amp;hea...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;salary=15$20&amp;ckid=16fbac46101aa036&amp;hea...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;salary=20$30&amp;ckid=16fbac46101aa036&amp;hea...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;salary=30$50&amp;ckid=16fbac46101aa036&amp;hea...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;salary=50$100&amp;ckid=16fbac46101aa036&amp;he...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&salary=10$15&ckid=16fbac46101aa036&hea...           \n",
       "1  init=-1&salary=15$20&ckid=16fbac46101aa036&hea...           \n",
       "2  init=-1&salary=20$30&ckid=16fbac46101aa036&hea...           \n",
       "3  init=-1&salary=30$50&ckid=16fbac46101aa036&hea...           \n",
       "4  init=-1&salary=50$100&ckid=16fbac46101aa036&he...           "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析url\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "\n",
    "df = pd.DataFrame([ urlparse(x) for x in 薪资数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init             1\n",
      "salary           6\n",
      "ckid             1\n",
      "headckid         1\n",
      "flushckid        1\n",
      "fromSearchBtn    1\n",
      "siTag            1\n",
      "d_sfrom          1\n",
      "d_ckId           1\n",
      "d_curPage        1\n",
      "d_pageSize       1\n",
      "d_headId         1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>init</th>\n",
       "      <th>salary</th>\n",
       "      <th>ckid</th>\n",
       "      <th>headckid</th>\n",
       "      <th>flushckid</th>\n",
       "      <th>fromSearchBtn</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1</td>\n",
       "      <td>10$15</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1</td>\n",
       "      <td>15$20</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1</td>\n",
       "      <td>20$30</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1</td>\n",
       "      <td>30$50</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1</td>\n",
       "      <td>50$100</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-1</td>\n",
       "      <td>100$999</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>16fbac46101aa036</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3c35314940ccd26bffc65066bb46a906</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  init   salary              ckid          headckid flushckid fromSearchBtn  \\\n",
       "0   -1    10$15  16fbac46101aa036  16fbac46101aa036         1             2   \n",
       "1   -1    15$20  16fbac46101aa036  16fbac46101aa036         1             2   \n",
       "2   -1    20$30  16fbac46101aa036  16fbac46101aa036         1             2   \n",
       "3   -1    30$50  16fbac46101aa036  16fbac46101aa036         1             2   \n",
       "4   -1   50$100  16fbac46101aa036  16fbac46101aa036         1             2   \n",
       "5   -1  100$999  16fbac46101aa036  16fbac46101aa036         1             2   \n",
       "\n",
       "                                           siTag         d_sfrom  \\\n",
       "0  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "1  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "2  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "3  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "5  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "\n",
       "                             d_ckId d_curPage d_pageSize  \\\n",
       "0  3c35314940ccd26bffc65066bb46a906         0         40   \n",
       "1  3c35314940ccd26bffc65066bb46a906         0         40   \n",
       "2  3c35314940ccd26bffc65066bb46a906         0         40   \n",
       "3  3c35314940ccd26bffc65066bb46a906         0         40   \n",
       "4  3c35314940ccd26bffc65066bb46a906         0         40   \n",
       "5  3c35314940ccd26bffc65066bb46a906         0         40   \n",
       "\n",
       "                           d_headId  \n",
       "0  3c35314940ccd26bffc65066bb46a906  \n",
       "1  3c35314940ccd26bffc65066bb46a906  \n",
       "2  3c35314940ccd26bffc65066bb46a906  \n",
       "3  3c35314940ccd26bffc65066bb46a906  \n",
       "4  3c35314940ccd26bffc65066bb46a906  \n",
       "5  3c35314940ccd26bffc65066bb46a906  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# B-3 针对query 再解析之 \n",
    "#df_qs = pd.DataFrame([ parse_qs(x) for x in df['query'] ])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head(12)\n",
    "# df_qs[['keyword','industries']]  取表格中特定的某个值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'salary': ['10$15'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906']}\n",
      "{'10-15万': '10$15', '15-20万': '15$20', '20-30万': '20$30', '30-50万': '30$50', '50-100万': '50$100', '100万以上': '100$999'}\n"
     ]
    }
   ],
   "source": [
    "# 建构 参数模板 及 字典_salary\n",
    "def parse_url_qs_for_salary (url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "# parse_url_qs_for_compTag(list(公司数据选择器链结.values())[0])['compTag']\n",
    "参数模板 = parse_url_qs_for_salary(list(薪资数据选择器链结.values())[0])\n",
    "print(参数模板)\n",
    "# [ parse_url_qs_for_compTag(x)['compTag'] for x in 公司数据选择器链结.values()]\n",
    "[ parse_url_qs_for_salary(x)['salary'][0] for x in 薪资数据选择器链结.values()]\n",
    "\n",
    "字典_salary = { k:parse_url_qs_for_salary(v)['salary'][0] for k,v in 薪资数据选择器链结.items()}\n",
    "print(字典_salary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'10-15万': {'init': ['-1'], 'salary': ['10$15'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906'], 'keyword': ['用户体验']}, '15-20万': {'init': ['-1'], 'salary': ['15$20'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906'], 'keyword': ['用户体验']}, '20-30万': {'init': ['-1'], 'salary': ['20$30'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906'], 'keyword': ['用户体验']}, '30-50万': {'init': ['-1'], 'salary': ['30$50'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906'], 'keyword': ['用户体验']}, '50-100万': {'init': ['-1'], 'salary': ['50$100'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906'], 'keyword': ['用户体验']}, '100万以上': {'init': ['-1'], 'salary': ['100$999'], 'ckid': ['16fbac46101aa036'], 'headckid': ['16fbac46101aa036'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['3c35314940ccd26bffc65066bb46a906'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['3c35314940ccd26bffc65066bb46a906'], 'keyword': ['用户体验']}}\n"
     ]
    }
   ],
   "source": [
    "# 生成城市参数模板  \n",
    "def 参数模板生成(salary , keyword ):\n",
    "    参数 = 参数模板.copy()  # 复制一份参数模版\n",
    "    参数['salary'] = salary\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "# 赋值：keyword\n",
    "参数_salary_用户体验 = { k:参数模板生成(salary = [v], keyword = ['用户体验']) for k,v in 字典_salary.items()}\n",
    "print(参数_salary_用户体验)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "# request请求\n",
    "session = HTMLSession()\n",
    "\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '行业': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>行业</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>薪资水平</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>8-12k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>薪酬主管</td>\n",
       "      <td>上海</td>\n",
       "      <td>北京万景百年室内设计有限公司上海分公司</td>\n",
       "      <td>https://www.liepin.com/job/1927749527.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8960550/</td>\n",
       "      <td>10-15万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>6-15k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>模具设计工程师（密封-新能源）</td>\n",
       "      <td>广州-黄埔区</td>\n",
       "      <td>国机智能</td>\n",
       "      <td>https://www.liepin.com/job/1927749185.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8942684/</td>\n",
       "      <td>10-15万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>10-12k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>安全主管</td>\n",
       "      <td>成都</td>\n",
       "      <td>四川辉瑞建设有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927748777.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10166071/</td>\n",
       "      <td>10-15万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>8-12k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>文化宣传（翻译）</td>\n",
       "      <td>济南</td>\n",
       "      <td>浪潮</td>\n",
       "      <td>https://www.liepin.com/job/1927748471.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7893220/</td>\n",
       "      <td>10-15万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>品牌活动线上推广经理</td>\n",
       "      <td>济南</td>\n",
       "      <td>浪潮</td>\n",
       "      <td>https://www.liepin.com/job/1927748201.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7893220/</td>\n",
       "      <td>10-15万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>10年以上</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>公有云高端销售岗位</td>\n",
       "      <td></td>\n",
       "      <td>紫光云技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927738781.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9691729/</td>\n",
       "      <td>100万以上</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>TA Head （Group）</td>\n",
       "      <td>深圳</td>\n",
       "      <td>米高蒲志国际(香港)有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927738359.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1790705/</td>\n",
       "      <td>100万以上</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>10年以上</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>创新技术产品规划总监</td>\n",
       "      <td>北京</td>\n",
       "      <td>京东数字科技控股有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927738123.shtml</td>\n",
       "      <td>https://www.liepin.com/company/215535/</td>\n",
       "      <td>100万以上</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>50-83k·12薪</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>WebRTC音视频研发专家</td>\n",
       "      <td>北京</td>\n",
       "      <td>360</td>\n",
       "      <td>https://www.liepin.com/job/1927736647.shtml</td>\n",
       "      <td>https://www.liepin.com/company/6429309/</td>\n",
       "      <td>100万以上</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年04月24日</td>\n",
       "      <td>Senior Manager, AppleCare Business Development...</td>\n",
       "      <td>上海</td>\n",
       "      <td>苹果</td>\n",
       "      <td>https://www.liepin.com/job/1927735951.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8356233/</td>\n",
       "      <td>100万以上</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>240 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      edu     经验          薪水           时间  \\\n",
       "0   大专及以上  5-10年   8-12k·12薪  2020年04月24日   \n",
       "1   大专及以上   1-3年   6-15k·12薪  2020年04月24日   \n",
       "2   大专及以上  5-10年  10-12k·12薪  2020年04月24日   \n",
       "3    统招本科   3-5年   8-12k·12薪  2020年04月24日   \n",
       "4    统招本科   3-5年  10-15k·12薪  2020年04月24日   \n",
       "..    ...    ...         ...          ...   \n",
       "35   统招本科  10年以上          面议  2020年04月24日   \n",
       "36  本科及以上   经验不限          面议  2020年04月24日   \n",
       "37  本科及以上  10年以上          面议  2020年04月24日   \n",
       "38  本科及以上  5-10年  50-83k·12薪  2020年04月24日   \n",
       "39  本科及以上  5-10年          面议  2020年04月24日   \n",
       "\n",
       "                                                   职称      行业  \\\n",
       "0                                                薪酬主管      上海   \n",
       "1                                     模具设计工程师（密封-新能源）  广州-黄埔区   \n",
       "2                                                安全主管      成都   \n",
       "3                                            文化宣传（翻译）      济南   \n",
       "4                                          品牌活动线上推广经理      济南   \n",
       "..                                                ...     ...   \n",
       "35                                          公有云高端销售岗位           \n",
       "36                                    TA Head （Group）      深圳   \n",
       "37                                         创新技术产品规划总监      北京   \n",
       "38                                      WebRTC音视频研发专家      北京   \n",
       "39  Senior Manager, AppleCare Business Development...      上海   \n",
       "\n",
       "                   公司名称                                           链结  \\\n",
       "0   北京万景百年室内设计有限公司上海分公司  https://www.liepin.com/job/1927749527.shtml   \n",
       "1                  国机智能  https://www.liepin.com/job/1927749185.shtml   \n",
       "2            四川辉瑞建设有限公司  https://www.liepin.com/job/1927748777.shtml   \n",
       "3                    浪潮  https://www.liepin.com/job/1927748471.shtml   \n",
       "4                    浪潮  https://www.liepin.com/job/1927748201.shtml   \n",
       "..                  ...                                          ...   \n",
       "35            紫光云技术有限公司  https://www.liepin.com/job/1927738781.shtml   \n",
       "36       米高蒲志国际(香港)有限公司  https://www.liepin.com/job/1927738359.shtml   \n",
       "37         京东数字科技控股有限公司  https://www.liepin.com/job/1927738123.shtml   \n",
       "38                  360  https://www.liepin.com/job/1927736647.shtml   \n",
       "39                   苹果  https://www.liepin.com/job/1927735951.shtml   \n",
       "\n",
       "                                       公司URL    薪资水平  \n",
       "0    https://www.liepin.com/company/8960550/  10-15万  \n",
       "1    https://www.liepin.com/company/8942684/  10-15万  \n",
       "2   https://www.liepin.com/company/10166071/  10-15万  \n",
       "3    https://www.liepin.com/company/7893220/  10-15万  \n",
       "4    https://www.liepin.com/company/7893220/  10-15万  \n",
       "..                                       ...     ...  \n",
       "35   https://www.liepin.com/company/9691729/  100万以上  \n",
       "36   https://www.liepin.com/company/1790705/  100万以上  \n",
       "37    https://www.liepin.com/company/215535/  100万以上  \n",
       "38   https://www.liepin.com/company/6429309/  100万以上  \n",
       "39   https://www.liepin.com/company/8356233/  100万以上  \n",
       "\n",
       "[240 rows x 10 columns]"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_salary_用户体验.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (薪资水平 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "# C-4   输出\n",
    "df_all.to_excel(\"薪资类型的数据爬取.xlsx\", sheet_name=\"搜查结果\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 翻页模版构建："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<Element 'a' href='/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=1'>, <Element 'a' href='/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=2'>, <Element 'a' href='/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=3'>, <Element 'a' href='/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=4'>, <Element 'a' href='/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=1'>, <Element 'a' class=('last',) href='/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=9' title='末页'>]\n"
     ]
    }
   ],
   "source": [
    "# 翻页：参数字典的拆解→xpath解析翻页a/@href\n",
    "url = \"https://www.liepin.com/zhaopin/?keyword=产品经理\"\n",
    "session = HTMLSession()\n",
    "r = session.get( url )\n",
    "# A-1  xpath 解析翻页a/@href\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a' # 有disabled, current等href是javascript\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "print (r.html.xpath(xpath_翻页a)) # 物件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=1', '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=2', '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=3', '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=4', '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=1', '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=9']\n"
     ]
    }
   ],
   "source": [
    "# 把上面的翻页url变成列表并且去除不必要的元素\n",
    "href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "print (href_列表)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'2': '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=1', '3': '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=2', '4': '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=3', '5': '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=4', '下一页': '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=1', '': '/zhaopin/?init=-1&headckid=e6378ea044d5e521&fromSearchBtn=2&keyword=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&ckid=e6378ea044d5e521°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=56d438b88a4e3924579aa8bf30fad9b1&d_curPage=0&d_pageSize=40&d_headId=56d438b88a4e3924579aa8bf30fad9b1&curPage=9'}\n"
     ]
    }
   ],
   "source": [
    "href_字典 = {x.text:x.xpath('//@href')[0]  for x in r.html.xpath(xpath_翻页a)}\n",
    "print (href_字典)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = HTMLSession()\n",
    "\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "from random import random\n",
    "\n",
    "#%%time\n",
    "time.sleep(3+4*random())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "产品经理 10\n",
      "UX 10\n",
      "新媒体运营 10\n",
      "交互设计 10\n"
     ]
    }
   ],
   "source": [
    "#%%time\n",
    "# B-3 多个页面+多个关键词\n",
    "import time\n",
    "from random import random\n",
    "\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "\n",
    "keywords = ['产品经理','UX','新媒体运营','交互设计']\n",
    "list_df = list()\n",
    "\n",
    "## 第一页试探有多长的页面\n",
    "for key in keywords:\n",
    "    payload = 参数模板生成(keyword=[key], curPage=['0'])\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "    df = pd.DataFrame([ urlparse(x) for x in href_列表])\n",
    "    df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "    df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) # 变成整数\n",
    "    长度 = df_qs.curPage_int.max()+1\n",
    "    参数_keyword_X_curPage = { \n",
    "        i:参数模板生成(curPage = [i], \\\n",
    "                      keyword = [key]) \\\n",
    "        for i in range(0,长度)\\\n",
    "        }\n",
    "    #print (参数_keyword_X_curPage)\n",
    "    print (key,长度)\n",
    "    \n",
    "    for k,v in 参数_keyword_X_curPage.items():\n",
    "        payload = v\n",
    "        df = requests_liepin( url, params = payload)\n",
    "        time.sleep(3+4*random())  #放慢脚步 3-7秒, 平均约5秒\n",
    "        df = df.assign (keyword = key)  # 区分  keyword    \n",
    "        df = df.assign (curPage = k)  # 区分  curPage    \n",
    "        list_df.append(df)\n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index()\n",
    "df_all.index.name = '序'\n",
    "\n",
    "df_all.to_excel(\"产品经理_UX_新媒体运营_交互设计_翻页数据.xlsx\",\\\n",
    "                sheet_name=\"_\".join(keywords))\n",
    "# 预估时间: 2*5秒*10 =100\n",
    "# 预估数量: 2*40*10 =800"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
